home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
oper_sys
/
emerald
/
emrldsys.lha
/
Language
/
Compiler
/
scan.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-08-16
|
10KB
|
452 lines
/*
* @(#)scan.c 1.5 10/6/87
*/
#include "assert.h"
#include "nodes.h"
#include "tokens.h"
#include "system.h"
#include "MyParser.h"
#include "keyword.h"
#include "error.h"
Token currentToken, nextToken;
NodePtr currentTokenValue, nextTokenValue;
extern NodePtr yylval;
#define TNO -1
FILE *inputFile;
char *currentFileName = NULL;
int currentLineNumber = 1;
int nextLineNumber = 0;
int needIncLineNumber = 1;
static TokenBuffer nextTokenBuffer;
static int
ScanIllegal(), ScanLetter(), ScanColon(), ScanLparen(),
ScanRparen(), ScanDigit(), ScanDot(), ScanStringquote(),
ScanCharquote(), ScanComma(), ScanComment(), ScanOperator();
ScanLsquare(), ScanRsquare(),
ScanLcurly(), ScanRcurly(), ScanDollar(),
ScanEof(), ScanWhite(), ScanNL();
static void extend(tb)
register TokenBufferPtr tb;
{
register char *newBuffer;
register int currentLength;
currentLength = tb->bufferEnd - tb->buffer;
newBuffer = (char *) malloc((unsigned)2 * currentLength);
bcopy(tb->buffer, newBuffer, currentLength);
free(tb->buffer);
tb->buffer = newBuffer;
tb->fillPtr = tb->buffer + currentLength;
tb->bufferEnd = tb->buffer + 2 * currentLength;
currentLength = 2 * currentLength;
}
#define collect(tb, c) \
if ((tb)->fillPtr >= (tb)->bufferEnd) { \
extend(tb); \
} \
*(tb)->fillPtr++ = c;
char *strdup(S)
char *S;
{
register char *str;
register int length;
length = strlen(S) + 1;
str = (char *) malloc ((unsigned)length);
bcopy(S, str, length);
return(str);
}
/* This is the scanner which presents a token stream to its caller. */
static int nextChar = ' ';
TokenBuffer lineBuffer;
int currentPosition = 0;
static int positionInLine = -1;
static void readLine()
{
register int c;
lineBuffer.fillPtr = lineBuffer.buffer;
positionInLine = -1;
do {
c = getc(inputFile);
collect(&lineBuffer, c);
} while (c != EOF && c != '\n');
collect(&lineBuffer, '\0');
}
#define MgetNextChar() {\
register int c; \
if (needIncLineNumber) { \
nextLineNumber++; \
readLine(); \
} \
c = lineBuffer.buffer[++positionInLine]; \
if (c == EOF) nextChar = -1; \
else { \
nextChar = c; \
if (c & 0xffffff80) ScanIllegal(); \
needIncLineNumber = (c == '\n'); \
} \
}
void getNextChar()
{
MgetNextChar();
}
typedef int CharClasses;
/*
* We are changing things so that operation names can be made up of strange
* characters. Basically these things are now treated like identifiers, with
* keywords and all. The legal operator characters are =!><|&+-/*#@?^~.
*/
#define CILLEGAL 0
#define CLETTER 1
#define CCOLON 2
#define CLPAREN 3
#define CRPAREN 4
#define CDIGIT 5
#define CDOT 6
#define CSTRINGQUOTE 7
#define CCHARQUOTE 8
#define CCOMMA 9
#define CCOMMENT 10
#define COPERATOR 11
#define CLSQUARE 12
#define CRSQUARE 13
#define CLCURLY 14
#define CRCURLY 15
#define CDOLLAR 16
#define CEOF 17
#define CWHITE 18
#define CNL 19
/* This must come right before CharClasses since we use it to detect eof. */
static CharClasses junk[1] = { CEOF };
/* This has to be initialized */
static CharClasses charClass[128] = {
CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,
CILLEGAL,CWHITE,CNL,CILLEGAL,CWHITE,CWHITE,CILLEGAL,CILLEGAL,
CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,
CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,
CWHITE,COPERATOR,CSTRINGQUOTE,COPERATOR,CDOLLAR,CCOMMENT,COPERATOR,CCHARQUOTE,
CLPAREN,CRPAREN,COPERATOR,COPERATOR,CCOMMA,COPERATOR,CDOT,COPERATOR,
CDIGIT,CDIGIT,CDIGIT,CDIGIT,CDIGIT,CDIGIT,CDIGIT,CDIGIT,
CDIGIT,CDIGIT,CCOLON,CILLEGAL,COPERATOR,COPERATOR,COPERATOR,COPERATOR,
COPERATOR,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
CLETTER,CLETTER,CLETTER,CLSQUARE,CILLEGAL,CRSQUARE,COPERATOR,CLETTER,
CILLEGAL,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
CLETTER,CLETTER,CLETTER,CLCURLY,COPERATOR,CRCURLY,COPERATOR,CILLEGAL
};
static int (*charRoutines[])() = {
/* CILLEGAL */ ScanIllegal,
/* CLETTER */ ScanLetter,
/* CCOLON */ ScanColon,
/* CLPAREN */ ScanLparen,
/* CRPAREN */ ScanRparen,
/* CDIGIT */ ScanDigit,
/* CDOT */ ScanDot,
/* CSTRINGQUOTE */ ScanStringquote,
/* CCHARQUOTE */ ScanCharquote,
/* CCOMMA */ ScanComma,
/* CCOMMENT */ ScanComment,
/* COPERATOR */ ScanOperator,
/* CLSQUARE */ ScanLsquare,
/* CRSQUARE */ ScanRsquare,
/* CLCURLY */ ScanLcurly,
/* CRCURLY */ ScanRcurly,
/* CDOLLAR */ ScanDollar,
/* CEOF */ ScanEof,
/* CWHITE */ ScanWhite,
/* CNL */ ScanNL };
void Scanner_Accept()
{
currentToken = nextToken;
currentTokenValue = nextTokenValue;
currentLineNumber = nextLineNumber;
nextTokenBuffer.fillPtr = nextTokenBuffer.buffer;
nextTokenValue = (NodePtr) -1;
scan();
}
static int ScanIllegal()
{
IllegalCharacter(nextChar);
getNextChar();
}
static int ScanLetter()
{
register Ident id;
register int cc;
do {
collect(&nextTokenBuffer, nextChar);
getNextChar();
cc = charClass[nextChar];
} while (cc == CLETTER || cc == CDIGIT);
collect(&nextTokenBuffer, '\0');
id = Ident_Lookup(nextTokenBuffer.buffer,
nextTokenBuffer.fillPtr - nextTokenBuffer.buffer - 1);
if (id <= lastKeywordIdent) {
nextToken = firstKeyword + id;
} else {
nextToken = TIDENTIFIER;
nextTokenValue = NewNode(T_IDENT);
nextTokenValue->b.ident.ident = id;
}
}
static int ScanOperator()
{
register Ident id;
register int cc;
do {
collect(&nextTokenBuffer, nextChar);
getNextChar();
cc = charClass[nextChar];
} while (cc == COPERATOR);
collect(&nextTokenBuffer, '\0');
id = Ident_Lookup(nextTokenBuffer.buffer,
nextTokenBuffer.fillPtr - nextTokenBuffer.buffer - 1);
if (id <= lastKeywordIdent) {
nextToken = firstKeyword + id;
} else {
nextToken = TOPERATOR;
nextTokenValue = NewNode(T_IDENT);
nextTokenValue->b.ident.ident = id;
}
}
static int ScanColon()
{
getNextChar();
if (nextChar == '=') {
getNextChar();
nextToken = TSUGARASSIGN;
} else {
nextToken = TCOLON;
}
}
static int ScanLparen()
{
getNextChar();
nextToken = TLPAREN;
}
static int ScanRparen()
{
getNextChar();
nextToken = TRPAREN;
}
static int ScanDigit()
{
do {
collect(&nextTokenBuffer, nextChar);
getNextChar();
} while (charClass[nextChar] == CDIGIT);
if (nextChar == '.') {
do {
collect(&nextTokenBuffer, nextChar);
getNextChar();
} while (charClass[nextChar] == CDIGIT);
nextToken = TREALLITERAL;
} else {
nextToken = TINTEGERLITERAL;
}
collect(&nextTokenBuffer, '\0');
nextTokenValue = NewNode(T_STRING);
nextTokenValue->b.string.string = strdup(nextTokenBuffer.buffer);
}
static int ScanDot()
{
getNextChar();
nextToken = TDOT;
}
static int ScanComma()
{
getNextChar();
nextToken = TCOMMA;
}
static int ScanComment()
{
do {
getNextChar();
} while (charClass[nextChar] != CNL);
}
static int ScanLsquare()
{
getNextChar();
nextToken = TLSQUARE;
}
static int ScanRsquare()
{
getNextChar();
nextToken = TRSQUARE;
}
static int ScanLcurly()
{
getNextChar();
nextToken = TLCURLY;
}
static int ScanRcurly()
{
getNextChar();
nextToken = TRCURLY;
}
static int ScanDollar()
{
getNextChar();
nextToken = TDOLLAR;
}
static int DoAChar()
{
register char c = nextChar;
register int num = 0;
if (c == '\\') {
getNextChar();
if (nextChar == '^') {
getNextChar();
num = nextChar & 0x1f;
getNextChar();
} else if ('0' <= nextChar && nextChar <= '7') {
/* a C octal escape */
num = nextChar - '0';
getNextChar();
if ('0' <= nextChar && nextChar <= '7') {
num *= 8;
num += nextChar - '0';
getNextChar();
if ('0' <= nextChar && nextChar <= '7') {
num *= 8;
num += nextChar - '0';
getNextChar();
}
}
} else {
switch (nextChar) {
case 'n':
num = '\n';
break;
case 'b':
num = '\b';
break;
case 't':
num = '\t';
break;
case 'r':
num = '\r';
break;
case 'f':
num = '\f';
break;
default:
num = nextChar;
break;
}
getNextChar();
}
} else {
num = nextChar;
getNextChar();
}
return(num);
}
static int ScanStringquote()
{
getNextChar();
while (1) {
if (nextChar == -1) {
UnexpectedEndOfFile();
break;
} else if (nextChar == '"') {
getNextChar();
break;
}
collect(&nextTokenBuffer, DoAChar());
}
collect(&nextTokenBuffer, '\0');
nextToken = TSTRINGLITERAL;
nextTokenValue = NewNode(T_STRING);
nextTokenValue->b.string.string = strdup(nextTokenBuffer.buffer);
}
static int ScanCharquote()
{
getNextChar();
if (nextChar == -1) {
UnexpectedEndOfFile();
}
collect(&nextTokenBuffer, DoAChar());
if (nextChar != '\'') {
BeginSyntaxErrorMessage(1);
ErrorWrite("expected \"'\"");
EndErrorMessage();
}
getNextChar();
collect(&nextTokenBuffer, '\0');
nextToken = TCHARACTERLITERAL;
nextTokenValue = NewNode(T_STRING);
nextTokenValue->b.string.string = strdup(nextTokenBuffer.buffer);
}
static int ScanEof()
{
nextToken = TEOF;
}
static int ScanWhite()
{
do {
getNextChar();
} while (charClass[nextChar] == CWHITE);
}
static int ScanNL()
{
getNextChar();
}
/* This one scans until it finds the next token, leaving it in nextToken* */
scan()
{
nextToken = TNO;
do {
currentPosition = positionInLine;
(*charRoutines[charClass[nextChar]])();
} while (nextToken == TNO);
if (nextTokenValue == (NodePtr) -1) nextTokenValue = (NodePtr) nextToken;
}
void Scanner_Initialize()
{
junk[0] = CEOF;
nextTokenBuffer.buffer = (char *) malloc(80);
nextTokenBuffer.bufferEnd = nextTokenBuffer.buffer + 80;
nextTokenBuffer.fillPtr = nextTokenBuffer.buffer;
lineBuffer.buffer = (char *) malloc(100);
lineBuffer.bufferEnd = lineBuffer.buffer + 100;
lineBuffer.fillPtr = lineBuffer.buffer;
Ident_Initialize();
Keyword_Initialize();
}
int yylex()
{
Scanner_Accept();
yylval = nextTokenValue;
return(nextToken);
}